
# =============================================================
# File: App_G_matching.R
# Purpose: Produces results from Appendix Section G: Matching Analysis
# Paper: Foreignness as an Asset: European Carbon Regulation and the Relocation Threat among Multinational Firms
# Author: Patrick Bayer, patrick.bayer@strath.ac.uk
# Date: 5 October 2022
#
# Data: ./data.csv
#
# Technical disclaimer:
# All analyses in R version 4.1.2 (2021-11-01)
# RStudio 2022.07.1 Build 554 ("Spotted Wakerobin" Release (7872775e, 2022-07-22) for Windows)
# Windows 10 Enterprise, 64-bit
# 12th Gen Intel(R) Core(TM) i7-1270P 2.20 GHz with 32GB RAM
# =============================================================

library(lmtest)
library(sandwich)
library(MASS)
library(MatchIt)
library(cem)
library(cobalt)
library(cowplot)
library(ggpubr)
library(marginaleffects)
library(AER)
library(tidyverse)

# Load data
df <- read_csv(file = "./data.csv")

# Trim sample down to true within firm sample
# (1) Limit sample to European MNCs: no allocation data for domestically-owned operations for non-EU MNCs
# (2) Exclude MNCs with only foreign-owned operations as there is no within MNC variation

df$sample <- ifelse(df$mnc.eu==1 & df$foreign.share<1,1,0)
df <- df[df$sample==1,]
df$foreign <- as.factor(df$foreign) # set variable as categorical

# =============================================================
# APPENDIX G:  Matching analysis
# =============================================================

# The main text presents results from three additional matching analyses
# (1) Mahalanobis distance matching for the allocation variable
# (2) Mahalanobis distance matching for the emissions variable
# (3) Mahalanobis distance matching for both variables simultaneously

# =============================================================
# Mahalanobis distance matching: permit allocation variable
# =============================================================

# Matching data set
df.match <- df[,c("logDV","logAlloc0","foreign", "logEmit0","ETSsector","iso2","BVD", "mnc", "sample")]
df.match <- data.frame(na.omit(df.match))


# Check data before matching
m.out0 <- matchit(foreign ~ logAlloc0, data=df.match, method=NULL, distance="glm")

# Matching
m.out1 <- matchit(as.numeric(foreign) ~ logAlloc0, data=df.match, method="nearest", distance="mahalanobis")
df.match.post <- match.data(m.out1)
dim(df.match.post)

# Improvment in standardized mean differences
summary(m.out0)
summary(m.out1)

# Balance improvement in percent (SMD)
summary(m.out1)$sum.matched[,3]/summary(m.out1)$sum.all[,3]-1

# Improvement in univariate imbalance
imbalance(group=df.match$foreign, data=df.match[c("logAlloc0")])
imbalance(group=df.match.post$foreign, data=df.match.post[c("logAlloc0")])

# Balance improvement in percent (L1)
imbalance(group=df.match.post$foreign, data=df.match.post["logAlloc0"])$tab$L1/imbalance(group=df.match$foreign, data=df.match["logAlloc0"])$tab$L1-1


# =============================================================
# Model (1)
# =============================================================

mmm1 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df.match.post, weights=weights)
summary(mmm1)

# Observations
nobs(mmm1)
length(mmm1$xlevels$`as.factor(iso2)`)
length(mmm1$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(mmm1)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(mmm1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Model (2)
# =============================================================

mmm2 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match.post, weights=weights)
summary(mmm2)

# Observations
nobs(mmm2)
length(mmm2$xlevels$`as.factor(ETSsector)`)
length(mmm2$xlevels$`as.factor(iso2)`)
length(mmm2$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(mmm2)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(mmm2, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Mahalanobis distance matching: emissions variable
# =============================================================

# Matching data set
df.match <- df[,c("logDV","logAlloc0","foreign", "logEmit0","ETSsector","iso2","BVD", "mnc", "sample")]
df.match <- data.frame(na.omit(df.match))


# Check data before matching
m.out0 <- matchit(foreign ~ logEmit0, data=df.match, method=NULL, distance="glm")

# Matching
m.out1 <- matchit(as.numeric(foreign) ~ logEmit0, data=df.match, method="nearest", distance="mahalanobis")
df.match.post <- match.data(m.out1)
dim(df.match.post)

# Improvment in standardized mean differences
summary(m.out0)
summary(m.out1)

# Balance improvement in percent (SMD)
summary(m.out1)$sum.matched[,3]/summary(m.out1)$sum.all[,3]-1


# Improvement in univariate imbalance
imbalance(group=df.match$foreign, data=df.match["logEmit0"])
imbalance(group=df.match.post$foreign, data=df.match.post["logEmit0"])

# Balance improvement in percent (L1)
imbalance(group=df.match.post$foreign, data=df.match.post["logEmit0"])$tab$L1/imbalance(group=df.match$foreign, data=df.match["logEmit0"])$tab$L1-1


# =============================================================
# Model (1)
# =============================================================

mmm1 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df.match.post, weights=weights)
summary(mmm1)

# Observations
nobs(mmm1)
length(mmm1$xlevels$`as.factor(iso2)`)
length(mmm1$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(mmm1)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(mmm1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
# Model (2)
# =============================================================

mmm2 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match.post, weights=weights)
summary(mmm2)

# Observations
nobs(mmm2)
length(mmm2$xlevels$`as.factor(ETSsector)`)
length(mmm2$xlevels$`as.factor(iso2)`)
length(mmm2$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(mmm2)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(mmm2, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs




# =============================================================
# Mahalanobis distance matching: both variables
# =============================================================

# Matching data set
df.match <- df[,c("logDV","logAlloc0","foreign", "logEmit0","ETSsector","iso2","BVD", "mnc", "sample")]
df.match <- data.frame(na.omit(df.match))


# Check data before matching
m.out0 <- matchit(foreign ~ logAlloc0 + logEmit0, data=df.match, method=NULL, distance="glm")

# Matching
m.out1 <- matchit(as.numeric(foreign) ~ logAlloc0 + logEmit0, data=df.match, method="nearest", distance="mahalanobis")
df.match.post <- match.data(m.out1)
dim(df.match.post)

# Improvment in standardized mean differences
summary(m.out0)
summary(m.out1)

# Balance improvement in percent (SMD)
summary(m.out1)$sum.matched[,3]/summary(m.out1)$sum.all[,3]-1


# Improvement in univariate imbalance
imbalance(group=df.match$foreign, data=df.match[c("logAlloc0", "logEmit0")])
imbalance(group=df.match.post$foreign, data=df.match.post[c("logAlloc0", "logEmit0")])

# Balance improvement in percent (L1)
imbalance(group=df.match.post$foreign, data=df.match.post[c("logAlloc0", "logEmit0")])$tab$L1/imbalance(group=df.match$foreign, data=df.match[c("logAlloc0", "logEmit0")])$tab$L1-1




# =============================================================
# Model (1)
# =============================================================

mmm1 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df.match.post, weights=weights)
summary(mmm1)

# Observations
nobs(mmm1)
length(mmm1$xlevels$`as.factor(iso2)`)
length(mmm1$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(mmm1)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(mmm1, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs

# =============================================================
# Model (2)
# =============================================================

mmm2 <- lm(logDV~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match.post, weights=weights)
summary(mmm2)

# Observations
nobs(mmm2)
length(mmm2$xlevels$`as.factor(ETSsector)`)
length(mmm2$xlevels$`as.factor(iso2)`)
length(mmm2$xlevels$`as.factor(BVD)`)

# Marginal effects
(exp(summary(mmm2)$coef[["foreign1","Estimate"]])-1)*100

# Standard errors
(exp(coefci(mmm2, vcov. = vcovHC, type="HC0")["foreign1",])-1)*100 # robust SEs


# =============================================================
#                       END OF CODE
# =============================================================